In [1]:
# print version numbers of `gopca` and `genometools` packages used to run this notebook
import gopca
import genometools
print('gopca version:', gopca.__version__)
print('genometools version:', genometools.__version__)
In [2]:
import os
download_dir = 'download' # downloaded files will be stored here
#gene2accession_url = 'https://www.dropbox.com/s/bemduo72bu9fe6f/gene2accession_2016-01-18_human.tsv.gz?dl=1'
#gene2accession_file = os.path.join(download_dir, 'gene2accession_2016-01-18_human.tsv.gz')
#gene_annotation_url = 'ftp://ftp.ensembl.org/pub/release-83/gtf/homo_sapiens/Homo_sapiens.GRCh38.83.gtf.gz'
#gene_annotation_file = os.path.join(download_dir, 'Homo_sapiens.GRCh38.83.gtf.gz')
gene_ontology_url = 'http://viewvc.geneontology.org/viewvc/GO-SVN/ontology-releases/2016-01-18/go-basic.obo'
gene_ontology_file = os.path.join(download_dir, 'go-basic_2016-01-18.obo')
#go_annotation_url = 'ftp://ftp.ebi.ac.uk/pub/databases/GO/goa/old/HUMAN/gene_association.goa_human.153.gz'
#go_annotation_file = os.path.join(download_dir, 'gene_association.goa_human.153.gz')
go_gene_sets_url = 'https://www.dropbox.com/s/s9osj0lfnoonjtt/GO_gene_sets_human_ensembl83_goa153_ontology2016-01-18.tsv?dl=1'
go_gene_sets_file = os.path.join(download_dir, 'GO_gene_sets_human.tsv')
gene_expression_url = 'https://www.dropbox.com/s/obn0imd623yul7i/dmap_expression_mapped.tsv?dl=1'
gene_expression_file = os.path.join(download_dir, 'dmap_expression.tsv')
In [3]:
# make sure the notebook uses the entire width of the screen
from IPython.core.display import HTML, display
display(HTML("""
<style>
.container { width:95% !important; }
</style>"""))
# set up plotting with plotly
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
init_notebook_mode() # embed plotly graphs in the notebook
import genometools
from genometools import misc
import gopca
logger = misc.get_logger()
In [4]:
import os
from genometools import misc
# create the download directory if necessary
misc.make_sure_dir_exists(download_dir)
# download gene2accession file
#if not os.path.isfile(gene2accession_file):
# misc.http_download(gene2accession_url, gene2accession_file)
# download Ensembl gene annotations
#if not os.path.isfile(gene_annotation_file):
# misc.ftp_download(gene_annotation_url, gene_annotation_file)
# download Gene Ontology
if not os.path.isfile(gene_ontology_file):
misc.http_download(gene_ontology_url, gene_ontology_file)
# download GO annotations
#if not os.path.isfile(go_annotation_file):
# misc.ftp_download(go_annotation_url, go_annotation_file)
# download GO-derived gene sets
if not os.path.isfile(go_gene_sets_file):
misc.http_download(go_gene_sets_url, go_gene_sets_file)
# download gene expression data
if not os.path.isfile(gene_expression_file):
misc.http_download(gene_expression_url, gene_expression_file)
In [5]:
from genometools.basic import GeneSetCollection
from genometools.ontology import GeneOntology
from genometools.expression import ExpMatrix
# read the expression data
matrix = ExpMatrix.read_tsv(gene_expression_file)
# read gene sets
go_gene_sets = GeneSetCollection.read_tsv(go_gene_sets_file)
# read Gene Ontology
gene_ontology = GeneOntology.read_obo(gene_ontology_file)
In [6]:
from gopca import GOPCA, GOPCAParams
params = GOPCAParams() # contains GO-PCA default parameter values
analysis = GOPCA.simple_setup(matrix, params, go_gene_sets, gene_ontology)
run = analysis.run()
In [7]:
# generate the signature matrix figure
fig = run.sig_matrix.get_figure(width=1200, height=800)
iplot(fig)
In [8]:
# look up the signature in the GO-PCA result
sig = run.sig_matrix.get_signature('DNA strand') # note that we don't have to specify the full signature name
# generate a heatmap
fig = sig.get_figure(run.sig_matrix) # we're providing the signature matrix in order to show the samples in the same ordering
iplot(fig)
Copyright (c) 2016 Florian Wagner.
This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License.